* THIS DO-FILE PREPARES THE FINAL VARIABLES FOR REGRESSIONS

clear all
set maxvar 32000
global raw "Path to rawdata"


***************************************************
**** SETTING UP DATASETS FOR REGRESSIONS

* DEATH
use $raw/dod2016.dta", clear
gen deathyear = year(doddato)
replace deathyear = deathyear-1
save temp, replace

* ONET OCCUPATIONS
use reducedsample_v7_incl65yo, replace
keep pnr year
sort pnr year
merge 1:1 pnr year using person-isco_v1, keep(mat mas) nogen
gen isco08=DISCO08_ALLE_INDK_13
	replace isco08=DISCO08_LOEN_INDK if isco08==. | isco08==9999
	replace isco08=DISCO08_ALLE_INDK if isco08==. | isco08==9999
	replace isco08=DISCO08_SEL_INDK if isco08==. | isco08==9999
gen isco88=DISCOALLE_INDK
	replace isco88=DISCO_ALLE_INDK_13 if isco88==. | isco88==0 | isco88==9999
	replace isco88=DISCOLOEN_INDK if isco88==. | isco88==0 | isco88==9999
	replace isco88=DISCOSEL_INDK if isco88==. | isco88==0 | isco88==9999
drop DISCO*
replace isco88=99999 if isco88 == 0 | isco88 == 9999
replace isco08=99999 if isco08 == 9999
drop if isco88==. & isco08==.
preserve
keep if isco88!=.
merge m:1 isco88 using $raw/isco88_small, nogen keep(mat mas)
save tempisco, replace
restore
keep if isco08!=.
merge m:1 isco08 using $raw/isco08_small, nogen keep(mat mas)
append using tempisco
save tempisco, replace

* MERGING INTO SAMPLE
use reducedsample_v7_incl65yo, replace
merge m:1 pnr using temp, nogen keep(mat mas) keepus(deathyear)
gen deathtemp=year == deathyear
replace death = 1 if deathtemp == 1
replace death = 0 if death == .
drop deathtemp deathyear
erase temp.dta
merge 1:1 pnr year using tempisco, keep(mat mas) nogen
gen OCC=.
replace OCC=1 if Dmanagement==1
replace OCC=2 if Ddecision==1
replace OCC=3 if Dpressure==1
replace OCC=4 if Dartistic==1
replace OCC=5 if Dsocial==1
replace OCC=6 if Dsocial_interest==1
replace OCC=7 if Dunstructured==1
replace OCC=8 if Dunstructured_work==1
erase tempisco.dta

* MARRIAGE AND DIVORCE
merge 1:1 pnr year using panelraw_v7, keep(mat mas) keepus(civst) nogen
gen MARRIED = civst == "G" | civst == "P"
gen DIVORCE = civst == "F" | civst == "O"
drop civst

save reducedsample_v8, replace

***************************************************
**** GENERATING EARNINGS AND MENTAL HEALTH VARIABLES

use reducedsample_v8, replace

* UNCOMMEND LINE BELOW TO INCLUDE OLDER INDIVIDUALS
*drop if age > 60
su age birthyear

ren pnr PNR
ren age AGE
ren year YEAR
ren birthyear BIRTHCOHORT
ren female FEMALE
ren earn EARN
ren income INC
ren MOR_ID MOMID
ren death DEATH
gen POST = BIRTHCOHORT > 1956
ren lms_disability DISAB
ren college COLLEGE
ren gymnasium HS
ren med_benzo_ev BENZO
ren med_lit_ev LITHIUM
ren med_ssri_ev SSRI
ren med_ap2_ev AP2G
ren depres_nobipman_ev DEPR
ren schizo_nobipman_ev SCHIZO
ren anxiety_wide_ev ANX
ren bipman_ev BD

mvencode COLLEGE HS MARRIED DIVORCE, mv(0) o
bys PNR (YEAR): egen MAX_COL = max(COLLEGE)
replace COLLEGE = MAX_COL
bys PNR (YEAR): egen MAX_HS = max(HS)
replace HS = MAX_HS
bys PNR (YEAR): egen MAX_MARRIED = max(MARRIED)
replace MARRIED = MAX_MARRIED
bys PNR (YEAR): egen MAX_DIVORCE = max(DIVORCE)
replace DIVORCE = MAX_DIVORCE
drop MAX_*
gen MOMID_S = MOMID != ""

* MENTAL HEALTH VARIABLES							 *
gen BDLIT= BD == 1 | LITHIUM == 1
drop BD
ren BDLIT BD
gen BDPOST = BD * POST

gen DEPRANX = DEPR == 1 | ANX == 1 | SSRI == 1 | BENZO == 1

gen SCHIZO_AP2G = SCHIZO == 1 | AP2G == 1
drop SCHIZO
ren SCHIZO_AP2G SCHIZO

gen MENTAL = DEPRANX == 1 | SCHIZO == 1 | BD == 1 | ANX == 1 | SSRI == 1 | BENZO == 1 | LITHIUM == 1

gen MENTALPOST = MENTAL * POST

label var BD "BD"
label var DEPR "Depression"
label var SCHIZO "Schizophrenia"
label var ANX "Anxiety, incl PTSD & OCD"
label var DEPRANX "Depr/Anx"
label var MENTAL "MH condition"
label var MENTALPOST "MH x Post"

gen FEMALE_BD = BD * FEMALE
gen FEMALE_BD_POST = BD * FEMALE * POST
gen FEMALE_MENTAL = MENTAL * FEMALE
gen FEMALE_MENTAL_POST = MENTAL * FEMALE * POST

bys PNR (YEAR): egen NUMBD = total(bipman)
label var NUMBD "# of BD"

* EARNINGS AND INCOME VARIABLES

* CONVERTING FROM 2015-LEVEL DANISH KRONER TO 2015-LEVEL US DOLLARS
replace EARN=EARN*0.14641288
replace INC=INC*0.14641288

gen LNEARN = ln(EARN)
replace LNEARN = 0 if LNEARN == .
gen NOEARN = EARN == 0 | EARN == .
gen POSEARN = NOEARN == 0

gen LNINC = ln(INC)
replace LNINC = 0 if LNINC == .
gen NOINC = INC == 0 | INC == .
gen POSINC = NOINC == 0

qui sum EARN, d
gen TOP10 = EARN > `r(p90)'
replace TOP10 = . if EARN == .
qui sum EARN, d
gen TOP25 = EARN > `r(p75)'
replace TOP25 = . if EARN == .
qui sum EARN, d
gen BOTTOM10 = EARN <= `r(p10)'
replace BOTTOM10 = . if EARN == .
qui sum EARN, d
gen BOTTOM25 = EARN <= `r(p25)'
replace BOTTOM25 = . if EARN == .

* SIBLING VARIABLES	
bys MOMID: egen BDFAM = max(BD)
gen BDSIB = BDFAM == 1 & BD == 0
label var BDSIB "BD sibling"

bys MOMID: egen BDP = max(BDPOST)
replace BDP = 0 if BDPOST == 1 & BDSIB == 0
label var BDP "BD sibling in post cohort"

* NUMBER OF SIBLING
egen t = tag(PNR MOMID)
bys MOMID: egen NRSIB = sum(t)
replace NRSIB = NRSIB - 1
replace NRSIB = . if MOMID == ""
drop t

* PARENTAL WEALTH VERIABLES

gen missingparents=MOMID=="" & FAR_ID==""
gen missingwealth=par_pc_assets==.
gen TOPWEALTH=1 if par_pc_assets >= 75
gen MIDWEALTH=1 if par_pc_assets < 75 & par_pc_assets > 25
gen BOTTOMWEALTH=1 if par_pc_assets <= 25
replace TOPWEALTH=0 if TOPWEALTH == . | missingwealth == 1
replace MIDWEALTH=0 if MIDWEALTH == . | missingwealth == 1
replace BOTTOMWEALTH=0 if BOTTOMWEALTH == . | missingwealth == 1
label var TOPWEALTH "Parental assets in top 25%"
label var MIDWEALTH "Parental assets between top 25% and bottom 25%"
label var BOTTOMWEALTH "Parental assets in bottom 25%"
ren par_pc_assets WEALTH

save data2025, replace
clear